# script for creating files with fuel calculations and local pollution marginal damages
# for entrance/exits, allowing for larger and smaller study areas
    # 1) Down samples points along tracks - calculates values for fuel calculation
    # 2) Down samples further - determines marginal damage at each downsampled point
    # 3) Aggregates to track level (without local pollution) with outputs that can be used to calculate fuel and emissions
        # does this for different definitions of study area

# Imports #####################################################################
# for use with anaconda
try:
    import archook #The module which locates arcgis
    archook.get_arcpy()
    import arcpy
except ImportError:
    error
    # do whatever you do if arcpy isnt there.

# IMPORTS
import numpy as np
import datetime
import pandas
import time
import os

import dist_speed_functions as dist_funs

arcpy.env.overwriteOutput=True
#numpy.set_printoptions(threshold=numpy.nan)

###############################################################################
# Options

data_folder = r"H:\My Drive\Boats\ReplicationCode\data"

# Inputs 
tracks_file = os.path.join( data_folder ,r"AIS\voyages.gdb\wc_no_bound%s" )
tracks_where = " ( ( (PORT1<=13  and PORT2 IS NULL and RIGHT1=0) or (PORT2<=13  and PORT1 IS NULL and RIGHT2=0) ) and INTERP_FLAG=0 )" # 
out_tags = ["_2009","_2010" , "_2011","_2012","_2013","_2014","_2015","_2016"] # 

# TESTING
out_tags = ["_2009"] # 


# variables to include from tracks file
tracks_var_list = ['OID@','IMO','MMSI','PORT1','PORT2','TIME1','TIME2'] 

pnts_char_list = ['MMSI','IMO'] # 'PORT1','PORT2','TIME1','TIME2','RIGHT1','RIGHT2','VesselType','Length','Width']
pnts_var_list = ['OID@','SHAPE@X','SHAPE@Y','SHAPE@M'] + pnts_char_list

## Boundary Files
## Boundary Files
CAeca2009_file = os.path.join( data_folder , r"boundaries\CAeca_200907.shp" )
CAeca2011_file = os.path.join( data_folder , r"boundaries\CAeca_201112.shp" )
NAbounds_file = os.path.join( data_folder , r"boundaries\NorthAmericaBounds.shp" )
StudyAreabounds_file = os.path.join( data_folder , r"boundaries\StudyAreaBounds.shp" )
NAECA_file = os.path.join( data_folder , r"boundaries\NA_ECA_polygon.shp" )

# Damage grids
# AP2 + distance measures 
#Dgrid_file = r"C:\Users\rklotz\Documents\data\apeep_maps.gdb\offshore_grid_D"
Dgrid_file_proj = os.path.join( data_folder , r"damage_grids\apeep_dist_pop.shp" )

# InMAP
Dgrid_InMap_file_proj = os.path.join( data_folder , r"damage_grids\inmap.shp" ) # this is updated version

# downsampling time for fuel calculations
fuel_ds = '10min' # intervals in fuel calcs
apeep_ds = '30min' # intervals for local pollution damages and apeep and for final output 
                    # need something here fairly fine to pick up speed profile changes

max_kmh = 70 # maximum km/hr - smoothed otherwise

# densifying distance
dense_dist = "40 kilometers" # km to densify at

spatial_reference = arcpy.SpatialReference(4326) # GCS_WGS_1984
spatial_reference_proj = arcpy.SpatialReference(102010) 

### OUTPUT
# location for tracks csvs (just enter and exits, combines all years)
track_csv_file= os.path.join( data_folder ,r"AIS\voyages\voyages_entexit_nobound.csv")

###############################################################################
# reproject damages grid 
#print("Projecting Damages Grid")
#arcpy.Project_management(Dgrid_file, Dgrid_file_proj, spatial_reference)

# load the ECA boundaries
#print("Load ECA Boundaries")
#CAeca2009_geom = arcpy.CopyFeatures_management(CAeca2009_file,arcpy.Geometry())[0]
#CAeca2011_geom = arcpy.CopyFeatures_management(CAeca2011_file,arcpy.Geometry())[0]
   
# merging CA ECA boundaries
print("Merging ECA")
eca_na_fc = "in_memory/eca_fc"
arcpy.Merge_management([CAeca2009_file, CAeca2011_file, NAbounds_file, StudyAreabounds_file , NAECA_file ] , eca_na_fc)
    

# Process (looping through all years)
# df_pnts_list = [] 
df_tracks_list = []
for out_tag in out_tags :
    print(out_tag)
    start_time = time.time()
  
    print("Make feature layer")
    arcpy.management.MakeFeatureLayer(tracks_file % out_tag ,"lines_fc",tracks_where) 

    print("Densifying")
    tracks_dense = r"in_memory/tracks_dense"
    arcpy.GeodeticDensify_management ("lines_fc", tracks_dense, "GEODESIC", dense_dist)
    arcpy.Delete_management("lines_fc")
      
    # now loading to pandas table (for use later)
    df_tracks_in = pandas.DataFrame( arcpy.da.FeatureClassToNumPyArray(
                                    tracks_dense ,field_names=tracks_var_list, where_clause=tracks_where ,
                                     null_value = -9999, explode_to_points= False,
                                    spatial_reference = spatial_reference ) )
    df_tracks_in = df_tracks_in.rename(columns={'OID@':'OID'})
    if out_tag in ["_2015", "_2016"] :
        df_tracks_in['IMO'] = df_tracks_in["IMO"].str.lstrip("IMO").astype(int) # stripping IMO
    
    df_tracks_in['TIME'] = np.where(df_tracks_in.PORT1==-9999,df_tracks_in.TIME2,df_tracks_in.TIME1)
    df_tracks_in['AIS_YEAR'] =   int(filter(str.isdigit, out_tag))
    
    
    # converting to include utc (doesn't change times at all)
    #df_tracks_in.TrackStartTime = pandas.to_datetime( df_tracks_in.TrackStartTime , utc=True)
            
    # start timer
    print(time.time() - start_time)
    
    print("Load points to dataframe")
    # load points into dataframe
    df_points = pandas.DataFrame( arcpy.da.FeatureClassToNumPyArray(
                                    tracks_dense ,pnts_var_list,where_clause=tracks_where ,
                                     null_value = -9999, explode_to_points= True,
                                    spatial_reference = spatial_reference ) )
    #arcpy.Delete_management(tracks_dense)
    
    # stripping IMO string from IMO codes
    if out_tag in ["_2015", "_2016"] :
        df_points['IMO'] = df_points["IMO"].str.lstrip("IMO").astype(int)
    #df_points['IMO'] = df_points['IMO'].map( lambda x: x.lstrip('IMO') )
        
    # make time column
    ### here adjusting for dates in M stored as local time (need to convert to UTC)
    hours_added = datetime.timedelta(hours = 5)    
    df_points['time'] = pandas.to_datetime(df_points['SHAPE@M'],unit='s',utc=True) - hours_added  
    df_points = df_points.drop(labels='SHAPE@M' , axis=1)
    df_points = df_points.rename(columns={'SHAPE@X':'X','SHAPE@Y':'Y','OID@':'OID'})
       
    # down sampling (taking first point)
    df_points['date_ds'] = df_points['time'].dt.floor(fuel_ds) # 'H' 
    df_points_ds = df_points.groupby(by=['OID','date_ds'],as_index=False).first()
    
    # add last points of track
    df_points_ds = df_points_ds.append( df_points.groupby(by=['OID'],as_index=False).last() , sort=True )
    df_points_ds = df_points_ds.drop_duplicates(subset=['OID','time'],keep='first') # dropping duplicates (last point already included)
    df_points_ds = df_points_ds.sort_values(by=['OID','time'])
    
    #df_points.to_csv('track_points.csv')
    del df_points
    print(time.time() - start_time)

      # check if points within CA ECAs
    print('Calculating distance and in ECA')
    df_points_ds = dist_funs.checkECA_NA(df_points_ds,eca_na_fc,spatial_reference)
    df_points_ds = dist_funs.calcDist(df_points_ds,spatial_reference,spatial_reference_proj)   
    print(time.time() - start_time)
    
    # calculate time on each segment (first point is 0)
    df_points_ds = df_points_ds.sort_values(by=['OID','time'])
    df_points_ds['d_time_hr'] = ( df_points_ds.groupby('OID')['time'].diff().dt.total_seconds()/3600 ).fillna(0)
    
    # calculate speed
    df_points_ds['kmh'] = df_points_ds['d_km'] / df_points_ds['d_time_hr']
    
    # smooth speed
    df_points_ds.loc[df_points_ds.kmh>max_kmh,'kmh'] = np.nan # set really fast values to nan
    # the reset index here only works if grouping on single vessel 
    # doing a 3 segment interval centered at current point and requires 1 observation
    df_points_ds['kmh_rolling'] = df_points_ds['kmh'].groupby(df_points_ds['OID']).rolling(3,center=False,min_periods=1).mean().reset_index(0,drop=True)
    df_points_ds['kmh'] = np.where(df_points_ds.kmh.isnull, df_points_ds.kmh_rolling , df_points_ds.kmh)    
    
    # calculate time weighted speed^3
    df_points_ds['s3_dt'] = df_points_ds['kmh']**3 * df_points_ds['d_time_hr']
    df_points_ds['s3_dt_eca09'] = df_points_ds['s3_dt'] * df_points_ds['in_eca09']
    df_points_ds['s3_dt_eca11'] = df_points_ds['s3_dt'] * df_points_ds['in_eca11']
    df_points_ds['s3_dt_naeca'] = df_points_ds['s3_dt'] * df_points_ds['in_naeca']
    df_points_ds['s3_dt_studyarea'] = df_points_ds['s3_dt'] * df_points_ds['studyarea']
    
    df_points_ds['km_onland'] = df_points_ds['d_km'] * df_points_ds['onland']
    df_points_ds['km_studyarea'] = df_points_ds['d_km'] * df_points_ds['studyarea']
    df_points_ds['km_naeca'] = df_points_ds['d_km'] * df_points_ds['in_naeca']
    
    print(time.time() - start_time) 
            
    ###########################################################################
      
    
    print('Down sampling for Marginal Damage Join') # also selects only necessary variables
    df_points_ds['date_ds_apeep'] = df_points_ds['time'].dt.floor(apeep_ds) # 'H'
    df_points_apeep = df_points_ds[['OID','date_ds_apeep','s3_dt','s3_dt_eca09','s3_dt_eca11','s3_dt_naeca','s3_dt_studyarea','d_km','d_time_hr','km_onland']].groupby(by=['OID','date_ds_apeep'],as_index=False).sum()
    
    # dropping those with no distance
    df_points_apeep = df_points_apeep[df_points_apeep.d_km!=0].copy()
    
    # adding X Y coordinates
    df_XY_apeep = df_points_ds[['OID','date_ds_apeep','X','Y']].groupby(by=['OID','date_ds_apeep'],as_index=False).first()
    df_points_apeep = df_points_apeep.merge(df_XY_apeep,on=['OID','date_ds_apeep'])
    
    df_points_apeep['kmh'] = df_points_apeep['d_km']/df_points_apeep['d_time_hr'] # calculating speed
    
    # calculating indicators that is fraction of total power required
    df_points_apeep['in_eca09'] = df_points_apeep['s3_dt_eca09']/df_points_apeep['s3_dt']
    df_points_apeep['in_eca11'] = df_points_apeep['s3_dt_eca11']/df_points_apeep['s3_dt']
    df_points_apeep['in_naeca'] = df_points_apeep['s3_dt_naeca']/df_points_apeep['s3_dt']
    df_points_apeep['in_studyarea'] = df_points_apeep['s3_dt_studyarea']/df_points_apeep['s3_dt']
    df_points_apeep['onland'] = df_points_apeep['km_onland']/df_points_apeep['d_km']
        
    # cumulative distance
    df_points_apeep['km_cum'] = df_points_apeep.groupby(['OID'])['d_km'].cumsum()

    # create point index
    df_points_apeep['pnt_ID'] = df_points_apeep.index

    fc_ds_points_apeep = 'in_memory/ds_points'
    # list of variables to add to downsample points layer
    vars_for_pnts_lyr = ['pnt_ID','OID','X','Y','onland' ,'s3_dt' , 'in_naeca' , 'in_eca09', 'in_eca11' , 'in_studyarea' ]
    arcpy.da.NumPyArrayToFeatureClass (df_points_apeep[ vars_for_pnts_lyr ].to_records(index=False) ,  fc_ds_points_apeep , (u'X',u'Y') , spatial_reference=spatial_reference)

    print("Joining damages")
    fc_ds_points_tmp = 'in_memory/ds_points_merged_tmp' 
    fc_ds_points_merged = 'in_memory/ds_points_merged' 
    # AP2
    arcpy.SpatialJoin_analysis(fc_ds_points_apeep, Dgrid_file_proj, fc_ds_points_tmp, join_operation='JOIN_ONE_TO_ONE' 
                               , join_type='KEEP_ALL' , match_option='WITHIN' )
    # InMAP
    arcpy.SpatialJoin_analysis(fc_ds_points_tmp, Dgrid_InMap_file_proj, fc_ds_points_merged, join_operation='JOIN_ONE_TO_ONE' 
                               , join_type='KEEP_ALL' , match_option='WITHIN' )
    arcpy.Delete_management(fc_ds_points_apeep)
    arcpy.Delete_management(fc_ds_points_tmp)   
    
    # now merging damages into table and saving
    df_points_D = pandas.DataFrame( arcpy.da.FeatureClassToNumPyArray(
                                    fc_ds_points_merged,field_names=['pnt_ID','GRID_ID','kmpop_us','kmpop_ca','kmpop_la','kmpop_lasf' \
                                    ,'distUScoas','D_PM','D_SO2_SO4','D_PM_inm','D_SO2_inm','D_NOx_inm','D_VOC_inm','D_NH3_inm'], where_clause="",
                                     null_value = -9999, explode_to_points= False,
                                    spatial_reference = spatial_reference ) )
    df_points_apeep = df_points_apeep.merge(df_points_D,on='pnt_ID',how='left')
    
    # replace points without damage estimates
    # if missing use lowest observed marginal damage on that track
    # if still missing assing to lowest observed 
    df_points_apeep = df_points_apeep.replace(-9999,np.nan)
    pol_vars = ['D_PM','D_SO2_SO4','D_PM_inm','D_SO2_inm','D_NOx_inm','D_VOC_inm','D_NH3_inm']
    for v in pol_vars :
        df_points_apeep[v] = df_points_apeep[v].fillna(df_points_apeep.groupby('OID')[v].transform('min'))
        df_points_apeep[v] = df_points_apeep[v].fillna(df_points_apeep[v].min())
    
    
    # collapsing to different levels
    # keeping only variables necessary for fuel and emissions calcs
    # calculating each segements share of the track
    df_points_apeep = df_points_apeep.merge(df_points_apeep[['OID','d_km']].groupby(by='OID').sum(),on='OID',how='left',suffixes=['','_tot'])
    df_points_apeep['shr'] = df_points_apeep.d_km / df_points_apeep.d_km_tot
    
    df_points_apeep['in_200km'] = ( df_points_apeep.distUScoas <= 200 ) * 1
    df_points_apeep['in_250km'] = ( df_points_apeep.distUScoas <= 250 ) * 1
    df_points_apeep['in_300km'] = ( df_points_apeep.distUScoas <= 300 ) * 1
    df_points_apeep['in_350km'] = ( df_points_apeep.distUScoas <= 350 ) * 1
    df_points_apeep['in_400km'] = ( df_points_apeep.distUScoas <= 400 ) * 1
    #df_points_apeep['in_500km'] = ( df_points_apeep.distUScoas <= 500 ) * 1
    #df_points_apeep['in_600km'] = ( df_points_apeep.distUScoas <= 600 ) * 1
    
    # weighted S3
    df_points_apeep['s3_PM_inm'] = df_points_apeep['s3_dt']*df_points_apeep['D_PM_inm']
    df_points_apeep['s3_SO2_inm'] = df_points_apeep['s3_dt']*df_points_apeep['D_SO2_inm']
    df_points_apeep['s3_NOx_inm'] = df_points_apeep['s3_dt']*df_points_apeep['D_NOx_inm']
    df_points_apeep['s3_VOC_inm'] = df_points_apeep['s3_dt']*df_points_apeep['D_VOC_inm']
    df_points_apeep['s3_NH3_inm'] = df_points_apeep['s3_dt']*df_points_apeep['D_NH3_inm']
    df_points_apeep['s3_PM'] = df_points_apeep['s3_dt']*df_points_apeep['D_PM']
    df_points_apeep['s3_SO2'] = df_points_apeep['s3_dt']*df_points_apeep['D_SO2_SO4']
    
    # weighted hours (for aux fuel consumption)
    df_points_apeep['hrs_PM_inm'] = df_points_apeep['d_time_hr']*df_points_apeep['D_PM_inm']
    df_points_apeep['hrs_SO2_inm'] = df_points_apeep['d_time_hr']*df_points_apeep['D_SO2_inm']
    df_points_apeep['hrs_NOx_inm'] = df_points_apeep['d_time_hr']*df_points_apeep['D_NOx_inm']
    df_points_apeep['hrs_VOC_inm'] = df_points_apeep['d_time_hr']*df_points_apeep['D_VOC_inm']
    df_points_apeep['hrs_NH3_inm'] = df_points_apeep['d_time_hr']*df_points_apeep['D_NH3_inm']
    df_points_apeep['hrs_PM'] = df_points_apeep['d_time_hr']*df_points_apeep['D_PM']
    df_points_apeep['hrs_SO2'] = df_points_apeep['d_time_hr']*df_points_apeep['D_SO2_SO4']
    
      
    weight_vars = ['in_200km','in_250km','in_300km','in_350km','in_400km'] # 'in_studyarea','in_eca09','in_naeca',
    collapse_vars = ['s3_dt','d_km','d_time_hr','shr',
                     's3_PM_inm','s3_SO2_inm','s3_NOx_inm','s3_VOC_inm','s3_NH3_inm','s3_PM','s3_SO2',
                     'hrs_PM_inm','hrs_SO2_inm','hrs_NOx_inm','hrs_VOC_inm','hrs_NH3_inm','hrs_PM','hrs_SO2']
    
    d_names = {'s3_dt' : 's3','d_km':'km','d_time_hr':'hrs','shr':'shr' ,
            's3_PM_inm':'s3_PM_inm','s3_SO2_inm':'s3_SO2_inm','s3_NOx_inm':'s3_NOx_inm','s3_VOC_inm':'s3_VOC_inm','s3_NH3_inm':'s3_NH3_inm','s3_PM':'s3_PM','s3_SO2':'s3_SO2',
            'hrs_PM_inm':'hrs_PM_inm','hrs_SO2_inm':'hrs_SO2_inm','hrs_NOx_inm':'hrs_NOx_inm','hrs_VOC_inm':'hrs_VOC_inm','hrs_NH3_inm':'hrs_NH3_inm','hrs_PM':'hrs_PM','hrs_SO2':'hrs_SO2' }

    tmp = df_points_apeep.copy()    
    w_list = []
    for w in weight_vars :
        for v in collapse_vars :
            tmp[d_names[v]+"_"+w] = tmp[w] * tmp[v] # overwrite value with weighted
            
        w_vars = [d_names[sub] + '_' + w for sub in collapse_vars]
        w_vars_noshr = [d_names[sub] + '_' + w for sub in collapse_vars if not sub=="shr" ]
    
        tmp_w = tmp[['OID']+w_vars].groupby(['OID'],as_index=False ).sum()
        
        # naning all rows when line track is totally within the desired range
        tmp_w.loc[tmp_w['shr_'+w]>0.999,w_vars_noshr] = np.nan
        tmp_w = tmp_w.drop(columns='shr_'+w)
        
        # dropping all columns that 
        
        w_list.append(tmp_w)
        del tmp_w 
    
    # combining all levels of aggregation
    df_tot = reduce(lambda x, y: pandas.merge(x, y, on = 'OID'), w_list)    
    del w_list
   
    # merge in other characteristics   
    df_tot = df_tot.merge(df_tracks_in[['OID','MMSI','TIME','AIS_YEAR']],how='left',on='OID')

    
    df_tracks_list.append(df_tot)

    del fc_ds_points_merged
    del df_points_ds

print("Combining All Years")
df_tracks = pandas.concat(df_tracks_list)

print("Saving CSV")
df_tracks.to_csv(track_csv_file,sep=',',index=False,header=True)

# take output file and load it when creating final tracks
    # merge to tracks file by TIME and MMSI
    # calculate pollution, fuel consumption based on s3 and hour sums and vessel chars
